Executive Summary

Target Audience
- Potential & Existing host

Purpose
- From our analysis, to find out how to be competitive during this time (political unrest, covid) for diff room types

Making sure all the correct libraries and installations are complete

Importing our data

Initial Data Exploration

Filtering our sub-dataframes

# Store original str price to a new column 
listings$price_str = listings$price
# Clean listing price to numeric 
listings$price <- as.numeric(gsub('[$,]', '', listings$price))
# Filter listings to only those <HKD5000
listings <-listings[!(listings$price > 5000 ),]

Creating our sub-dataframes

Outputting the subdataframes as and when required

##  [1] "Apartment"              "Guest suite"            "Condominium"           
##  [4] "Bed and breakfast"      "Hostel"                 "Hotel"                 
##  [7] "House"                  "Campsite"               "Aparthotel"            
## [10] "Hut"                    "Loft"                   "Guesthouse"            
## [13] "Serviced apartment"     "Boutique hotel"         "Townhouse"             
## [16] "Other"                  "Bungalow"               "Tiny house"            
## [19] "Cave"                   "Boat"                   "Pension (South Korea)" 
## [22] "Igloo"                  "Chalet"                 "Cottage"               
## [25] "Treehouse"              "Nature lodge"           "Tent"                  
## [28] "Cabin"                  "Villa"                  "Vacation home"         
## [31] "Minsu (Taiwan)"         "Island"                 "Earth house"           
## [34] "Dorm"                   "Houseboat"              "Farm stay"             
## [37] "Castle"                 "Casa particular (Cuba)" "Kezhan (China)"        
## [40] "Resort"

Spatial Data Analysis

Reviews <- listings %>% group_by(neighbourhood_cleansed) %>% summarise(avg_loc_review = mean(review_scores_location, na.rm = TRUE))
colnames(Reviews) <- c("Eng_name","value")
Reviews$Eng_name = replace(Reviews$Eng_name, Reviews$Eng_name == "Central & Western", "Central and Western" ) 

library(sp)
hkmap = readRDS("HKG_adm1_old.rds") # geo data of HK map

# Preprocessing
map_data = data.frame(id=hkmap$ID_1, Eng_name=hkmap$NAME_1)
map_data = merge(map_data, Reviews, by = 'Eng_name')
hkmapdf = fortify(hkmap)
map_data = merge(hkmapdf, map_data, by="id")



map_bg = ggplot(map_data, aes(long, lat, group=group, fill = value)) +
  geom_polygon() + # Shape
  scale_fill_gradient(limits=range(map_data$value), 
                      low="#9999FF", high="#330066") + 
  layer(geom="path", stat="identity", position="identity", 
       mapping=aes(x=long, y=lat, group=group, 
                   color=I('#FFFFFF'))) +
  guides(fill = guide_legend(title = "Rating", title.position = "top")) + 
  ggtitle("Map showing Average Location Score by Area")

map_bg = map_bg + theme(axis.line=element_blank(),
                        axis.text.x=element_blank(), axis.title.x=element_blank(),
                        axis.text.y=element_blank(), axis.title.y=element_blank(),
                        axis.ticks=element_blank(), panel.background = element_blank() 
                        )
map_bg

Prices <- listings %>% group_by(neighbourhood_cleansed) %>% summarise(avg_price = mean(price, na.rm = TRUE))
colnames(Prices) <- c("Eng_name","value")

Prices$Eng_name = replace(Prices$Eng_name, Prices$Eng_name == "Central & Western", "Central and Western" ) 

library(sp)
hkmap2 = readRDS("HKG_adm1_old.rds") # geo data of HK map

# Preprocessing
map_data2 = data.frame(id=hkmap2$ID_1, Eng_name=hkmap2$NAME_1)
map_data2 = merge(map_data2, Prices, by = 'Eng_name')
hkmapdf2 = fortify(hkmap2)
map_data2 = merge(hkmapdf2, map_data2, by="id")

map_bg = ggplot(map_data2, aes(long, lat, group=group, fill = value)) +
  geom_polygon() + # Shape
  scale_fill_gradient(limits=range(map_data2$value), 
                      low="#FF9999", high="#CC0000") + 
  layer(geom="path", stat="identity", position="identity", 
       mapping=aes(x=long, y=lat, group=group, 
                   color=I('#FFFFFF'))) +
  guides(fill = guide_legend(title = "Rating", title.position = "top")) + 
  ggtitle("Map showing Average Price by Area")

map_bg = map_bg + theme(axis.line=element_blank(),
                        axis.text.x=element_blank(), axis.title.x=element_blank(),
                        axis.text.y=element_blank(), axis.title.y=element_blank(),
                        axis.ticks=element_blank(), panel.background = element_blank() 
                        )
map_bg

### Price per person scatter plots

Pricesperperson <- select(listings,'accommodates', 'price','review_scores_rating','neighbourhood_cleansed')

Pricesperperson <- within(Pricesperperson, price_per_pax <- price/accommodates)

Pricesperperson<-Pricesperperson[!(Pricesperperson$price_per_pax > 10000  || Pricesperperson$price_per_pax < 20 ),]
ggplot(data = Pricesperperson, mapping = aes(x = price_per_pax, y = review_scores_rating, color = neighbourhood_cleansed)) + geom_point()
## Warning: Removed 4291 rows containing missing values (geom_point).

Listings per neighbourhood

propertydf <-  listings %>% group_by(neighbourhood_cleansed, property_type) %>% summarize(Freq = n())

propertydf <- propertydf %>% filter(property_type %in% c("Apartment","House","Condominium","Townhouse", "Loft"))

totalproperty<-  listings %>% filter(property_type %in% c("Apartment","House","Condominium","Townhouse", "Loft"))%>% group_by(neighbourhood_cleansed) %>% summarize(sum = n())

propertyratio <- merge(propertydf, totalproperty, by="neighbourhood_cleansed")

propertyratio <- propertyratio %>% mutate(ratio = Freq/sum)

ggplot(propertyratio, aes(x=neighbourhood_cleansed, y=ratio, fill = property_type)) +
  geom_bar(position = "stack",stat="identity") + xlab("Borough") + ylab("Count")+
  scale_fill_discrete(name = "Property Type") + 
  scale_y_continuous(labels = scales::percent) +
  ggtitle("Which types of Listings are there in Hong Kong?",
          subtitle = "Map showing Count of Listing Type by Borough ") +
          theme(plot.title = element_text(face = "bold")) +
          theme(plot.subtitle = element_text(face = "bold", color = "grey35")) +
          theme(plot.caption = element_text(size=10, color = "grey68"))+scale_color_gradient(low="#d3cbcb", high="#852eaa")+
          theme(axis.text.x = element_text(size = 8) ) +
          scale_fill_manual("Property Type", values=c("#FF3300","#F0E442", "#009E73", "#56B4E9", "#666666")) +
          xlab("Neighborhood") + ylab("Percentage")+coord_flip()

Price Comparisons

Demand and Price Analysis

### Demand Price Analysis

Sentitiment Analysis via Review WordCloud